version 16
set more off

***************************************************************************************************
*THIS FILE PRODUCES FIGURE A.VI OF SLEMROD-REHMAN-WASEEM-2020, RESTAT
***************************************************************************************************

/* PANLE A: (FREQ<=10) */

use				"$project_data\ITRM_Dislo_Unique_SelfEmployed_BaselineNameFreq.dta", clear
g					treat=namefreq_baseline<=10
keep				regno year treat
g					one=1
bys				year treat:egen nfilers=sum(one)
duplicates		drop year treat, force
g					lnfilers=log(nfilers)
g					nfnormalized=0
qui				sum lnfilers if treat==1 & year==2006
replace			nfnormalized=lnfilers/r(mean) if treat==1
qui				sum lnfilers if treat==0 & year==2006
replace			nfnormalized=lnfilers/r(mean) if treat==0
#d					;
twoway  			(connected  nfnormalized year if treat==1,sort clcolor(red) mcolor(red) msize(large) msymbol(S) lwidth(thick))
					(connected   nfnormalized year if treat==0,sort clcolor(midblue) mcolor(midblue) msize(large) msymbol(D) lwidth(thick)),
					xtitle(Year) xtitle(, alignment(top))
					ytitle(Log Number of Total Filers) yscale(titlegap(*10))
					xlabel(2006(1)2015) 
					ylabel(0.80(0.1)1.20)
					xline(2011.5, lcolor(green) lwidth(thick))
					legend(region(style(none)) label(1 "Treatment") label(2 "Control") order(1 2))
               graphregion(fcolor(white) style(none) color(white) margin(0 2 0 2)) bgcolor(white);
graph          export "$project_output\ExtensiveMargin_BaselineNameFreq_Lessthan10.eps", replace;
#d					cr

/* PANEL B: (ABOVE VS. BELOW MEDIAN) */

use				"$project_data\ITRM_Dislo_Unique_SelfEmployed_BaselineNameFreq.dta", clear
qui            sum namefreq_baseline if year<=2011, d
g              treat=namefreq_baseline<=r(p50)
keep				regno year treat
g					one=1
bys				year treat:egen nfilers=sum(one)
duplicates		drop year treat, force
g					lnfilers=log(nfilers)
g					nfnormalized=0
qui				sum lnfilers if treat==1 & year==2006
replace			nfnormalized=lnfilers/r(mean) if treat==1
qui				sum lnfilers if treat==0 & year==2006
replace			nfnormalized=lnfilers/r(mean) if treat==0
#d					;
twoway  			(connected  nfnormalized year if treat==1,sort clcolor(red) mcolor(red) msize(large) msymbol(S) lwidth(thick))
					(connected   nfnormalized year if treat==0,sort clcolor(midblue) mcolor(midblue) msize(large) msymbol(D) lwidth(thick)),
					xtitle(Year) xtitle(, alignment(top))
					ytitle(Log Number of Total Filers) yscale(titlegap(*10))
					xlabel(2006(1)2015) 
					ylabel(0.80(0.1)1.20)
					xline(2011.5, lcolor(green) lwidth(thick))
					legend(region(style(none)) label(1 "Treatment") label(2 "Control") order(1 2))
               graphregion(fcolor(white) style(none) color(white) margin(0 2 0 2)) bgcolor(white);
graph          export "$project_output\ExtensiveMargin_BaselineNameFreq_Median.eps", replace;
#d					cr

/* PANEL C: (BOTTOM VS. TOP QUARTILE) */

use				"$project_data\ITRM_Dislo_Unique_SelfEmployed_BaselineNameFreq.dta", clear
qui            sum namefreq_baseline if year<=2011, d
g              treat=namefreq_baseline<=r(p25)
drop           if namefreq_baseline>r(p25) & namefreq_baseline<=r(p75)
keep				regno year treat
g					one=1
bys				year treat:egen nfilers=sum(one)
duplicates		drop year treat, force
g					lnfilers=log(nfilers)
g					nfnormalized=0
qui				sum lnfilers if treat==1 & year==2006
replace			nfnormalized=lnfilers/r(mean) if treat==1
qui				sum lnfilers if treat==0 & year==2006
replace			nfnormalized=lnfilers/r(mean) if treat==0
#d					;
twoway  			(connected  nfnormalized year if treat==1,sort clcolor(red) mcolor(red) msize(large) msymbol(S) lwidth(thick))
					(connected   nfnormalized year if treat==0,sort clcolor(midblue) mcolor(midblue) msize(large) msymbol(D) lwidth(thick)),
					xtitle(Year) xtitle(, alignment(top))
					ytitle(Log Number of Total Filers) yscale(titlegap(*10))
					xlabel(2006(1)2015) 
					ylabel(0.80(0.1)1.20)
					xline(2011.5, lcolor(green) lwidth(thick))
					legend(region(style(none)) label(1 "Treatment") label(2 "Control") order(1 2))
               graphregion(fcolor(white) style(none) color(white) margin(0 2 0 2)) bgcolor(white);
graph          export "$project_output\ExtensiveMargin_BaselineNameFreq_Quartile.eps", replace;
#d					cr

/* PANEL D: (BOTTOM VS. TOP DECILE) */

use				"$project_data\ITRM_Dislo_Unique_SelfEmployed_BaselineNameFreq.dta", clear
qui            sum namefreq_baseline if year<=2011, d
g              treat=namefreq_baseline<=r(p10)
drop           if namefreq_baseline>r(p10) & namefreq_baseline<=r(p90)
keep				regno year treat
g					one=1
bys				year treat:egen nfilers=sum(one)
duplicates		drop year treat, force
g					lnfilers=log(nfilers)
g					nfnormalized=0
qui				sum lnfilers if treat==1 & year==2006
replace			nfnormalized=lnfilers/r(mean) if treat==1
qui				sum lnfilers if treat==0 & year==2006
replace			nfnormalized=lnfilers/r(mean) if treat==0
#d					;
twoway  			(connected  nfnormalized year if treat==1,sort clcolor(red) mcolor(red) msize(large) msymbol(S) lwidth(thick))
					(connected   nfnormalized year if treat==0,sort clcolor(midblue) mcolor(midblue) msize(large) msymbol(D) lwidth(thick)),
					xtitle(Year) xtitle(, alignment(top))
					ytitle(Log Number of Total Filers) yscale(titlegap(*10))
					xlabel(2006(1)2015) 
					ylabel(0.80(0.1)1.20)
					xline(2011.5, lcolor(green) lwidth(thick))
					legend(region(style(none)) label(1 "Treatment") label(2 "Control") order(1 2))
					graphregion(fcolor(white) style(none) color(white) margin(0 2 0 2)) bgcolor(white);
graph          export "$project_output\ExtensiveMargin_BaselineNameFreq_Decile.eps", replace;
#d					cr

